package donot996.icu.service.impl;

import donot996.icu.service.ICrawlerService;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.springframework.stereotype.Service;

import java.io.IOException;

@Service
public class CrawlerServiceImpl implements ICrawlerService {
    @Override
    public String getContent(String url) throws IOException {

        CloseableHttpClient httpClient = HttpClients.createDefault(); // 创建httpclient实例
        HttpGet httpGet = new HttpGet(url); // 创建httpGet实例
        httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36");
        CloseableHttpResponse response = httpClient.execute(httpGet); // 执行get请求
        HttpEntity entity = response.getEntity(); // 获取返回实体
        String content = EntityUtils.toString(entity, "utf-8"); // 获取网页内容
        response.close();
        Document doc = Jsoup.parse(content); // 解析网页 得到文档对象
        Element select = doc.getElementById("zs_content");
        return select.html();
    }
}
